ncbi https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE147507

sra run selector https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA615032&o=acc_s%3Aa

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(biomaRt)
## Warning: package 'biomaRt' was built under R version 4.0.3
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.0.3
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.0.3
library(limma)
library(seqinr)
## 
## Attaching package: 'seqinr'
## The following object is masked from 'package:limma':
## 
##     zscore
## The following object is masked from 'package:biomaRt':
## 
##     getSequence
## The following object is masked from 'package:dplyr':
## 
##     count
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
library(RColorBrewer)
library(pheatmap)
library(org.Hs.eg.db) 
## Loading required package: AnnotationDbi
## Warning: package 'AnnotationDbi' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.0.3
## 
## clusterProfiler v3.18.1  For help: https://guangchuangyu.github.io/software/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
library(ComplexHeatmap)
## Warning: package 'ComplexHeatmap' was built under R version 4.0.3
## ========================================
## ComplexHeatmap version 2.6.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). 90% of the arguments
##    in the original pheatmap() are identically supported in the new function. You 
##    can still use the original function by explicitly calling pheatmap::pheatmap().
## 
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
## 
##     pheatmap
metadata <- read.delim("../SraRunTable.txt",sep = ",")
#metadata <- metadata[,c("GEO_Accession..exp.","source_name","Treatment","Cell_Line","Time_point")]
#metadata <- metadata[order(metadata$Run),]
metadata <- metadata[order(metadata$Sample.Name),]
metadata <- unique(metadata[,c("Sample.Name","source_name","Treatment")])
rownames(metadata) <- seq(1:110)
head(metadata)
##   Sample.Name                    source_name                   Treatment
## 1  GSM4432378        Mock treated NHBE cells              Mock treatment
## 2  GSM4432379        Mock treated NHBE cells              Mock treatment
## 3  GSM4432380        Mock treated NHBE cells              Mock treatment
## 4  GSM4432381 SARS-CoV-2 infected NHBE cells SARS-CoV-2 infected (MOI 2)
## 5  GSM4432382 SARS-CoV-2 infected NHBE cells SARS-CoV-2 infected (MOI 2)
## 6  GSM4432383 SARS-CoV-2 infected NHBE cells SARS-CoV-2 infected (MOI 2)
data <- read.delim("../Counts_all.tsv")
rownames(data) <- data$X
data <- data[,-1]
head(data)
##           Series1_NHBE_Mock_1 Series1_NHBE_Mock_2 Series1_NHBE_Mock_3
## DDX11L1                     0                   0                   0
## WASH7P                     29                  24                  23
## FAM138A                     0                   0                   0
## FAM138F                     0                   0                   0
## OR4F5                       0                   0                   0
## LOC729737                 112                 119                 113
##           Series1_NHBE_SARS.CoV.2_1 Series1_NHBE_SARS.CoV.2_2
## DDX11L1                           0                         0
## WASH7P                           34                        19
## FAM138A                           0                         0
## FAM138F                           0                         0
## OR4F5                             0                         0
## LOC729737                       127                        84
##           Series1_NHBE_SARS.CoV.2_3 Series2_A549_Mock_1 Series2_A549_Mock_2
## DDX11L1                           0                   0                   0
## WASH7P                           44                  68                  43
## FAM138A                           0                   0                   0
## FAM138F                           0                   0                   0
## OR4F5                             0                   0                   0
## LOC729737                       270                  11                   3
##           Series2_A549_Mock_3 Series2_A549_SARS.CoV.2_1
## DDX11L1                     0                         0
## WASH7P                     33                        65
## FAM138A                     0                         0
## FAM138F                     0                         0
## OR4F5                       0                         0
## LOC729737                   6                         8
##           Series2_A549_SARS.CoV.2_2 Series2_A549_SARS.CoV.2_3
## DDX11L1                           1                         1
## WASH7P                           79                        48
## FAM138A                           0                         0
## FAM138F                           0                         0
## OR4F5                             0                         0
## LOC729737                        10                        10
##           Series3_A549_Mock_1 Series3_A549_Mock_2 Series3_A549_RSV_1
## DDX11L1                     1                   0                  0
## WASH7P                    184                 128                 51
## FAM138A                     0                   0                  0
## FAM138F                     0                   0                  0
## OR4F5                       0                   0                  0
## LOC729737                 108                  95                 37
##           Series3_A549_RSV_2 Series4_A549_Mock_1 Series4_A549_Mock_2
## DDX11L1                    0                   0                   0
## WASH7P                    43                  15                  12
## FAM138A                    0                   0                   0
## FAM138F                    0                   0                   0
## OR4F5                      0                   0                   0
## LOC729737                 11                   1                   5
##           Series4_A549_IAV_1 Series4_A549_IAV_2 Series5_A549_Mock_1
## DDX11L1                    0                  0                   0
## WASH7P                     3                  3                  64
## FAM138A                    0                  0                   0
## FAM138F                    0                  0                   0
## OR4F5                      0                  0                   0
## LOC729737                  0                  2                   7
##           Series5_A549_Mock_2 Series5_A549_Mock_3 Series5_A549_SARS.CoV.2_1
## DDX11L1                     0                   0                         0
## WASH7P                     53                  37                        38
## FAM138A                     0                   0                         0
## FAM138F                     0                   0                         0
## OR4F5                       0                   0                         0
## LOC729737                  14                  11                         1
##           Series5_A549_SARS.CoV.2_2 Series5_A549_SARS.CoV.2_3
## DDX11L1                           0                         0
## WASH7P                           47                        65
## FAM138A                           0                         0
## FAM138F                           0                         0
## OR4F5                             0                         0
## LOC729737                        13                         4
##           Series6_A549.ACE2_Mock_1 Series6_A549.ACE2_Mock_2
## DDX11L1                          0                        0
## WASH7P                          37                        4
## FAM138A                          0                        0
## FAM138F                          0                        0
## OR4F5                            0                        0
## LOC729737                       13                        2
##           Series6_A549.ACE2_Mock_3 Series6_A549.ACE2_SARS.CoV.2_1
## DDX11L1                          0                              0
## WASH7P                          22                              1
## FAM138A                          0                              0
## FAM138F                          0                              0
## OR4F5                            0                              0
## LOC729737                       18                              9
##           Series6_A549.ACE2_SARS.CoV.2_2 Series6_A549.ACE2_SARS.CoV.2_3
## DDX11L1                                0                              0
## WASH7P                                 2                              4
## FAM138A                                0                              0
## FAM138F                                0                              0
## OR4F5                                  0                              0
## LOC729737                              2                              1
##           Series7_Calu3_Mock_1 Series7_Calu3_Mock_2 Series7_Calu3_Mock_3
## DDX11L1                      0                    0                    0
## WASH7P                      25                   60                   84
## FAM138A                      0                    0                    0
## FAM138F                      0                    0                    0
## OR4F5                        0                    0                    0
## LOC729737                   65                  184                  435
##           Series7_Calu3_SARS.CoV.2_1 Series7_Calu3_SARS.CoV.2_2
## DDX11L1                            1                          0
## WASH7P                            47                         32
## FAM138A                            0                          0
## FAM138F                            0                          0
## OR4F5                              0                          0
## LOC729737                        271                        137
##           Series7_Calu3_SARS.CoV.2_3 Series8_A549_Mock_1 Series8_A549_Mock_2
## DDX11L1                            0                   0                   0
## WASH7P                            41                  68                  17
## FAM138A                            0                   0                   0
## FAM138F                            0                   0                   0
## OR4F5                              0                   0                   0
## LOC729737                        265                   4                   3
##           Series8_A549_Mock_3 Series8_A549_RSV_1 Series8_A549_RSV_2
## DDX11L1                     0                  0                  0
## WASH7P                     21                 18                  9
## FAM138A                     0                  0                  0
## FAM138F                     0                  0                  0
## OR4F5                       0                  0                  0
## LOC729737                   5                 13                  9
##           Series8_A549_RSV_3 Series8_A549_HPIV3_3 Series8_A549_HPIV3_2
## DDX11L1                    1                    0                    0
## WASH7P                    28                   12                   23
## FAM138A                    0                    0                    0
## FAM138F                    0                    0                    0
## OR4F5                      0                    0                    0
## LOC729737                  9                    3                    5
##           Series8_A549_HPIV3_1 Series9_NHBE_Mock_1 Series9_NHBE_Mock_2
## DDX11L1                      0                   0                   0
## WASH7P                      14                  57                  58
## FAM138A                      0                   0                   0
## FAM138F                      0                   0                   0
## OR4F5                        0                   0                   0
## LOC729737                    5                  58                  51
##           Series9_NHBE_Mock_3 Series9_NHBE_Mock_4 Series9_NHBE_IAV_1
## DDX11L1                     0                   0                  0
## WASH7P                     53                  89                102
## FAM138A                     0                   0                  0
## FAM138F                     0                   0                  0
## OR4F5                       0                   0                  0
## LOC729737                  44                  93                107
##           Series9_NHBE_IAV_2 Series9_NHBE_IAV_3 Series9_NHBE_IAV_4
## DDX11L1                    0                  0                  0
## WASH7P                    26                 21                  7
## FAM138A                    0                  0                  0
## FAM138F                    0                  0                  0
## OR4F5                      0                  0                  0
## LOC729737                 37                 30                 10
##           Series9_NHBE_IAVdNS1_1 Series9_NHBE_IAVdNS1_2 Series9_NHBE_IAVdNS1_3
## DDX11L1                        0                      0                      0
## WASH7P                        41                     56                     36
## FAM138A                        0                      0                      0
## FAM138F                        0                      0                      0
## OR4F5                          0                      0                      0
## LOC729737                     52                     52                     42
##           Series9_NHBE_IAVdNS1_4 Series9_NHBE_IFNB_4h_1 Series9_NHBE_IFNB_4h_2
## DDX11L1                        0                      0                      0
## WASH7P                       131                     72                     66
## FAM138A                        0                      0                      0
## FAM138F                        0                      0                      0
## OR4F5                          0                      0                      0
## LOC729737                    101                     94                    102
##           Series9_NHBE_IFNB_6h_1 Series9_NHBE_IFNB_6h_2 Series9_NHBE_IFNB_12h_1
## DDX11L1                        0                      0                       0
## WASH7P                        46                     35                      48
## FAM138A                        0                      0                       0
## FAM138F                        0                      0                       0
## OR4F5                          0                      0                       0
## LOC729737                     52                     41                      41
##           Series9_NHBE_IFNB_12h_2 Series15_HealthyLungBiopsy_2
## DDX11L1                         0                            0
## WASH7P                         46                          261
## FAM138A                         0                            0
## FAM138F                         0                            0
## OR4F5                           0                            0
## LOC729737                      60                           15
##           Series15_HealthyLungBiopsy_1 Series15_COVID19Lung_2
## DDX11L1                              0                      0
## WASH7P                             140                      0
## FAM138A                              0                      0
## FAM138F                              0                      0
## OR4F5                                0                      0
## LOC729737                           70                     17
##           Series15_COVID19Lung_1 Series16_A549.ACE2_Mock_1
## DDX11L1                        0                         0
## WASH7P                         0                         0
## FAM138A                        0                         0
## FAM138F                        0                         0
## OR4F5                          0                         0
## LOC729737                      0                         1
##           Series16_A549.ACE2_Mock_2 Series16_A549.ACE2_Mock_3
## DDX11L1                           0                         0
## WASH7P                           11                         7
## FAM138A                           0                         0
## FAM138F                           0                         0
## OR4F5                             0                         0
## LOC729737                         2                         2
##           Series16_A549.ACE2_SARS.CoV.2_1 Series16_A549.ACE2_SARS.CoV.2_2
## DDX11L1                                 0                               0
## WASH7P                                  2                               6
## FAM138A                                 0                               0
## FAM138F                                 0                               0
## OR4F5                                   0                               0
## LOC729737                               0                               0
##           Series16_A549.ACE2_SARS.CoV.2_3 Series16_A549.ACE2_SARS.CoV.2_Rux_1
## DDX11L1                                 0                                   0
## WASH7P                                  5                                  12
## FAM138A                                 0                                   0
## FAM138F                                 0                                   0
## OR4F5                                   0                                   0
## LOC729737                               1                                   4
##           Series16_A549.ACE2_SARS.CoV.2_Rux_2
## DDX11L1                                     0
## WASH7P                                      6
## FAM138A                                     0
## FAM138F                                     0
## OR4F5                                       0
## LOC729737                                   0
##           Series16_A549.ACE2_SARS.CoV.2_Rux_3
## DDX11L1                                     0
## WASH7P                                      8
## FAM138A                                     0
## FAM138F                                     0
## OR4F5                                       0
## LOC729737                                   2

S1 NHBE.Sars (MOI 2)

new.data <- data[,1:6]
new.meta <- metadata[1:6,]
new.meta$source_name <- as.factor(new.meta$source_name)

##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##   (Intercept) source_nameSARS-CoV-2 infected NHBE cells
## 1           1                                         0
## 2           1                                         0
## 3           1                                         0
## 4           1                                         1
## 5           1                                         1
## 6           1                                         1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5486076
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.1 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.1, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.1,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                              
## [2] "source_nameSARS-CoV-2 infected NHBE cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
NHBE.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected NHBE cells", number=nrow(dge$counts)) 
R_gene.names<- NHBE.sars.name[NHBE.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##            logFC  AveExpr        t      P.Value    adj.P.Val        B
## SAA2    2.417163 4.969914 21.61939 1.876272e-11 2.243646e-07 16.42017
## CCL20   3.139099 4.212050 18.49831 1.301616e-10 7.782365e-07 14.36587
## INHBA   1.824571 6.232337 16.89001 3.993195e-10 9.694929e-07 13.76812
## S100A8  1.896605 6.707539 16.76486 4.375037e-10 9.694929e-07 13.68269
## TNFAIP3 1.618827 7.202701 16.62059 4.864490e-10 9.694929e-07 13.57575
## IL36G   2.750377 3.778832 17.03467 3.595973e-10 9.694929e-07 13.38953
## SAA1    2.247018 7.549586 15.76323 9.301177e-10 1.384014e-06 12.92361
## TNIP1   1.268120 8.310385 15.76837 9.264270e-10 1.384014e-06 12.91639
## KRT6B   1.550960 7.509390 15.61249 1.045847e-09 1.384014e-06 12.80312
## CXCL1   1.427990 6.904826 15.48328 1.157396e-09 1.384014e-06 12.70898

S2 A549.Sars (MOI 0.2)

new.data <- data[,7:12]
new.meta <- metadata[7:12,]
new.meta$source_name <- as.factor(new.meta$source_name)

##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_nameSARS-CoV-2 infected A549 cells
## 7            1                                         0
## 8            1                                         0
## 9            1                                         0
## 10           1                                         1
## 11           1                                         1
## 12           1                                         1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5603982
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.2 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.2, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.2,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                              
## [2] "source_nameSARS-CoV-2 infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.sars.0.2.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells", number=nrow(dge$counts)) 
R_gene.names<- A549.sars.0.2.name[A549.sars.0.2.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##          logFC  AveExpr        t      P.Value    adj.P.Val        B
## IFI6  4.330863 5.212637 36.55897 4.711729e-15 5.755377e-11 21.85594
## ISG15 3.751729 4.023533 28.03836 1.705803e-13 1.041819e-09 18.71222
## IRF9  2.153629 5.286352 22.80394 2.734200e-12 6.679650e-09 18.05121
## IFIT1 4.283022 4.063577 24.51269 1.039055e-12 3.578297e-09 17.41369
## PARP9 2.067938 5.276501 20.82052 9.201990e-12 1.873372e-08 17.02836
## OAS1  1.542308 6.743258 20.19372 1.381461e-11 2.410650e-08 16.91588
## OAS3  1.381409 8.183566 18.58645 4.143981e-11 6.327341e-08 15.94093
## MX1   5.025758 3.141509 24.29392 1.171772e-12 3.578297e-09 15.84208
## IRF7  3.148346 3.813212 18.36691 4.847793e-11 6.579532e-08 14.88902
## STAT1 1.317132 8.311213 15.66700 3.898759e-10 4.609216e-07 13.74166
dim(A549.sars.0.2.name)
## [1] 12215     6
dim(dge)
## [1] 12215     6

S3 A549.RSV (MOI 15)

new.data <- data[,13:16]
new.meta <- metadata[13:16,]
new.meta$source_name <- as.factor(new.meta$source_name)

##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_nameRSV infected A549 cells
## 13           1                                  0
## 14           1                                  0
## 15           1                                  1
## 16           1                                  1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=2
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.584163
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.3 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.3, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.3,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                        "source_nameRSV infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.RSV.name = topTable(fit2, coef="source_nameRSV infected A549 cells", number=nrow(dge$counts)) 
R_gene.names<- A549.RSV.name[A549.RSV.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##           logFC  AveExpr        t       P.Value     adj.P.Val        B
## IFIT1  5.092480 7.242044 25.77272 1.283181e-144 1.633875e-140 323.1601
## MX1    5.564444 5.573895 24.26439 1.345748e-128 8.567703e-125 285.2392
## ISG15  4.536756 7.769077 23.56707 1.678089e-121 7.122371e-118 269.0784
## IFIT3  5.019121 5.975730 23.41241 5.914634e-120 1.882776e-116 265.2462
## OASL   5.527189 5.101092 22.70079 5.820997e-113 1.482375e-109 248.6107
## IFIT2  4.831337 5.167464 20.79999  2.686254e-95  5.700678e-92 207.7003
## IRF7   3.933973 6.821339 20.05523  8.816586e-89  1.603737e-85 192.8414
## HELZ2  3.826506 7.211111 19.81082  1.081018e-86  1.720575e-83 188.0098
## BATF2  4.485574 4.702147 18.41742  2.937129e-75  4.155385e-72 161.2441
## PARP10 4.027628 5.332585 18.24351  6.892530e-74  8.776258e-71 158.2173
dim(A549.RSV.name)
## [1] 12733     6
dim(dge)
## [1] 12733     4

S4 A549.IAV (MOI 5)

new.data <- data[,17:20]
new.meta <- metadata[17:20,]
new.meta$source_name <- as.factor(new.meta$source_name)
new.meta$source_name <- relevel(new.meta$source_name, "Mock treated A549 cells")
str(new.meta)
## 'data.frame':    4 obs. of  3 variables:
##  $ Sample.Name: chr  "GSM4432394" "GSM4432395" "GSM4432396" "GSM4432397"
##  $ source_name: Factor w/ 2 levels "Mock treated A549 cells",..: 1 1 2 2
##  $ Treatment  : chr  "Mock treatment" "Mock treatment" "IAV infected (MOI 5)" "IAV infected (MOI 5)"
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_nameIAV infected A549 cells
## 17           1                                  0
## 18           1                                  0
## 19           1                                  1
## 20           1                                  1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=2
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5629215
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.4 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.4, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.4,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                        "source_nameIAV infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.IAV.name = topTable(fit2, coef="source_nameIAV infected A549 cells", number=nrow(dge$counts)) 
R_gene.names<- A549.RSV.name[A549.IAV.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## [1] logFC     AveExpr   t         P.Value   adj.P.Val B        
## <0 rows> (or 0-length row.names)

S5 A549.Sars (MOI 2)

new.data <- data[,21:26]
new.meta <- metadata[21:26,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame':    6 obs. of  3 variables:
##  $ Sample.Name: chr  "GSM4462336" "GSM4462337" "GSM4462338" "GSM4462339" ...
##  $ source_name: Factor w/ 2 levels "Mock treated A549 cells",..: 1 1 1 2 2 2
##  $ Treatment  : chr  "Mock treatment" "Mock treatment" "Mock treatment" "SARS-CoV-2 infected (MOI 2)" ...
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_nameSARS-CoV-2 infected A549 cells
## 21           1                                         0
## 22           1                                         0
## 23           1                                         0
## 24           1                                         1
## 25           1                                         1
## 26           1                                         1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5620957
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.5 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.5, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.5,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                              
## [2] "source_nameSARS-CoV-2 infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.sars.2.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells", number=nrow(dge$counts)) 
R_gene.names <- A549.sars.2.name[A549.sars.2.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##              logFC   AveExpr         t      P.Value    adj.P.Val        B
## STC2      4.020526  6.428324  41.57514 1.343871e-14 1.646511e-10 23.33235
## GPX2     -2.773737  8.287283 -35.43363 9.442482e-14 4.542013e-10 21.95389
## C15orf48  2.757710  6.531311  34.11016 1.501073e-13 4.542013e-10 21.41921
## FASN     -2.252676  8.106420 -33.70373 1.736858e-13 4.542013e-10 21.39276
## LAMC2     3.702878  7.288538  33.52408 1.853580e-13 4.542013e-10 21.25640
## MCM5     -2.384744  6.522076 -32.68780 2.520478e-13 5.146816e-10 20.99003
## PEG10    -2.331746  6.934755 -31.73875 3.606314e-13 5.363177e-10 20.68207
## TK1      -2.224042  6.762880 -31.65487 3.724192e-13 5.363177e-10 20.64664
## KRT8     -2.225048 10.699966 -30.77140 5.252867e-13 5.363177e-10 20.35035
## ICAM1     2.792513  6.189426  30.92044 4.953453e-13 5.363177e-10 20.29775

S6 A549.ACE2.sars (MOI 0.2)

new.data <- data[,27:32]
new.meta <- metadata[27:32,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame':    6 obs. of  3 variables:
##  $ Sample.Name: chr  "GSM4462342" "GSM4462343" "GSM4462344" "GSM4462345" ...
##  $ source_name: Factor w/ 2 levels "Mock treated A549 cells trasnduced with a vector expressing human ACE2",..: 1 1 1 2 2 2
##  $ Treatment  : chr  "Mock treatment" "Mock treatment" "Mock treatment" "SARS-CoV-2 infected (MOI 0.2)" ...
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept)
## 27           1
## 28           1
## 29           1
## 30           1
## 31           1
## 32           1
##    source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2
## 27                                                                                        0
## 28                                                                                        0
## 29                                                                                        0
## 30                                                                                        1
## 31                                                                                        1
## 32                                                                                        1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5582878
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.6 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.6, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.6,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                                                                             
## [2] "source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.ACE2.0.2.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2", number=nrow(dge$counts)) 
R_gene.names <- A549.ACE2.0.2.sars.name[A549.ACE2.0.2.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##             logFC  AveExpr        t      P.Value    adj.P.Val        B
## JUNB     3.523119 7.140814 33.96592 1.407961e-25 1.713348e-21 48.22323
## NFKBIA   3.270176 8.573796 31.08024 1.900859e-24 7.782733e-21 45.86466
## CCNL1    2.845292 7.692370 30.09558 4.868825e-24 1.481218e-20 44.89106
## TNFAIP3  3.458063 6.466889 28.85271 1.664489e-23 3.538519e-20 43.52932
## CXCL2    6.886796 5.109736 31.07033 1.918662e-24 7.782733e-21 43.49306
## ARRDC3   3.506089 6.122677 28.80610 1.744688e-23 3.538519e-20 43.39858
## DDIT3    3.580606 5.877895 28.40105 2.634018e-23 4.579052e-20 42.93331
## PPP1R15A 3.227209 8.065816 27.92096 4.322787e-23 6.575499e-20 42.76773
## IER5     2.330592 7.899642 25.51727 5.850825e-22 7.910966e-19 40.18662
## BBC3     2.720648 7.599000 24.81725 1.303591e-21 1.586340e-18 39.38640

S7 Calu.sars (MOI 2)

new.data <- data[,33:38]
new.meta <- metadata[33:38,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame':    6 obs. of  3 variables:
##  $ Sample.Name: chr  "GSM4462348" "GSM4462349" "GSM4462350" "GSM4462351" ...
##  $ source_name: Factor w/ 2 levels "Mock treated Calu-3 cells",..: 1 1 1 2 2 2
##  $ Treatment  : chr  "Mock treatment" "Mock treatment" "Mock treatment" "SARS-CoV-2 infected (MOI 2)" ...
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_nameSARS-CoV-2 infected Calu-3 cells
## 33           1                                           0
## 34           1                                           0
## 35           1                                           0
## 36           1                                           1
## 37           1                                           1
## 38           1                                           1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5648025
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.7 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.7, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.7,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                                
## [2] "source_nameSARS-CoV-2 infected Calu-3 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
calu.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected Calu-3 cells", number=nrow(dge$counts)) 
R_gene.names <- calu.sars.name[calu.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##             logFC  AveExpr        t      P.Value    adj.P.Val        B
## OAS2     4.866901 7.523400 45.13101 1.878023e-17 1.244333e-13 30.12248
## IFIT2    5.342672 8.193275 44.90876 2.021498e-17 1.244333e-13 30.11478
## IFIT1    4.537755 7.548068 42.63578 4.384391e-17 1.496508e-13 29.37067
## IFIT3    4.926069 7.053995 41.98359 5.516330e-17 1.496508e-13 29.08442
## RSAD2    5.120329 6.094588 41.71122 6.077931e-17 1.496508e-13 28.78729
## IL1A     5.238155 6.887289 40.32193 1.006530e-16 2.065232e-13 28.49478
## PPP1R15A 3.904741 8.292272 37.59917 2.848085e-16 5.008968e-13 27.67445
## MX1      3.872235 7.747760 36.96448 3.668257e-16 5.017769e-13 27.41033
## TXNIP    3.426616 8.950076 36.16208 5.082591e-16 5.759155e-13 27.13415
## ICAM1    3.668712 9.581654 35.57920 6.470044e-16 5.759155e-13 26.90578

S8 A549.RSV2.HPIV3 (MOI 2)

new.data <- data[,39:47]
new.meta <- metadata[39:47,]
new.meta$source_name <- as.factor(new.meta$source_name)
str(new.meta)
## 'data.frame':    9 obs. of  3 variables:
##  $ Sample.Name: chr  "GSM4462354" "GSM4462355" "GSM4462356" "GSM4462357" ...
##  $ source_name: Factor w/ 3 levels "HPIV3 infected A549 cells",..: 2 2 2 3 3 3 1 1 1
##  $ Treatment  : chr  "Mock treatment" "Mock treatment" "Mock treatment" "RSV infected (MOI 2)" ...
new.meta$source_name <- relevel(new.meta$source_name,"Mock treated A549 cells")
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_nameHPIV3 infected A549 cells
## 39           1                                    0
## 40           1                                    0
## 41           1                                    0
## 42           1                                    0
## 43           1                                    0
## 44           1                                    0
## 45           1                                    1
## 46           1                                    1
## 47           1                                    1
##    source_nameRSV infected A549 cells
## 39                                  0
## 40                                  0
## 41                                  0
## 42                                  1
## 43                                  1
## 44                                  1
## 45                                  0
## 46                                  0
## 47                                  0
## attr(,"assign")
## [1] 0 1 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=5
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5437446
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.8 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.8, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.8,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                         
## [2] "source_nameHPIV3 infected A549 cells"
## [3] "source_nameRSV infected A549 cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.RSV.2.name = topTable(fit2, coef="source_nameRSV infected A549 cells", number=nrow(dge$counts)) 
A549.HPIV3.3.name = topTable(fit2, coef="source_nameHPIV3 infected A549 cells", number=nrow(dge$counts)) 
R_gene.names <- A549.RSV.2.name[A549.RSV.2.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##           logFC  AveExpr        t      P.Value    adj.P.Val        B
## IFIT3  6.009547 6.850910 30.19556 5.715170e-10 2.606897e-06 12.68200
## MX1    6.577520 6.385205 30.35499 5.466270e-10 2.606897e-06 12.51551
## IFIT2  7.342778 7.032180 29.68654 6.598625e-10 2.606897e-06 12.38024
## GBP4   6.979492 3.045794 27.25794 1.356882e-09 4.020440e-06 11.45091
## LAMC2  4.578369 7.313385 21.59082 9.648150e-09 1.270215e-05 10.68643
## OASL   7.970537 5.874429 22.63680 6.485822e-09 1.270215e-05 10.64700
## HERC5  4.390229 6.311126 21.01905 1.208309e-08 1.270215e-05 10.46532
## GBP1   6.142683 5.069691 21.67427 9.340912e-09 1.270215e-05 10.45643
## GLIPR1 3.694440 5.701637 20.86318 1.286077e-08 1.270215e-05 10.44888
## IFI44  7.954380 2.885669 21.95598 8.381613e-09 1.270215e-05 10.28142

S9 NHBE.series (MOI 2)

new.data <- data[,48:65]
new.meta <- metadata[48:65,]
new.meta$source_name <- as.factor(new.meta$source_name)
new.meta$source_name <- relevel(new.meta$source_name,"Mock treated NHBE cells")
##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##    (Intercept) source_namehuman IFNB treated NHBE cells
## 48           1                                        0
## 49           1                                        0
## 50           1                                        0
## 51           1                                        0
## 52           1                                        0
## 53           1                                        0
## 54           1                                        0
## 55           1                                        0
## 56           1                                        0
## 57           1                                        0
## 58           1                                        0
## 59           1                                        0
## 60           1                                        1
## 61           1                                        1
## 62           1                                        1
## 63           1                                        1
## 64           1                                        1
## 65           1                                        1
##    source_nameIAV infected NHBE cells source_nameIAVdNS1 infected NHBE cells
## 48                                  0                                      0
## 49                                  0                                      0
## 50                                  0                                      0
## 51                                  0                                      0
## 52                                  1                                      0
## 53                                  1                                      0
## 54                                  1                                      0
## 55                                  1                                      0
## 56                                  0                                      1
## 57                                  0                                      1
## 58                                  0                                      1
## 59                                  0                                      1
## 60                                  0                                      0
## 61                                  0                                      0
## 62                                  0                                      0
## 63                                  0                                      0
## 64                                  0                                      0
## 65                                  0                                      0
## attr(,"assign")
## [1] 0 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=9
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5401202
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.9 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.9, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.9,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                             
## [2] "source_namehuman IFNB treated NHBE cells"
## [3] "source_nameIAV infected NHBE cells"      
## [4] "source_nameIAVdNS1 infected NHBE cells"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
NHBE.IAV.name = topTable(fit2, coef="source_nameIAV infected NHBE cells", number=nrow(dge$counts)) 
NHBE.IAV.dNS1.name = topTable(fit2, coef="source_nameIAVdNS1 infected NHBE cells", number=nrow(dge$counts)) 
NHBE.IFNB.name = topTable(fit2, coef="source_namehuman IFNB treated NHBE cells", number=nrow(dge$counts)) 
R_gene.names <- NHBE.IAV.name[NHBE.IAV.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
##              logFC  AveExpr         t      P.Value    adj.P.Val        B
## MIR22HG  1.0223077 6.389417  8.582620 3.740674e-08 0.0004403895 8.963597
## PDIA6    0.7921531 8.137763  7.591399 2.524154e-07 0.0011274978 7.129707
## HELZ2    2.1917029 7.524074  7.383454 3.830792e-07 0.0011274978 6.720001
## IRF9     1.3456132 6.044571  7.262176 4.899157e-07 0.0011535554 6.478208
## JUND    -1.6703224 5.737263 -7.076829 7.162305e-07 0.0012789148 6.122546
## UPK3B   -2.0990846 1.723997 -7.434852 3.453607e-07 0.0011274978 5.962404
## MAL     -3.3926298 3.883394 -6.931523 9.677339e-07 0.0012789148 5.786257
## MX1      4.4436771 7.610313  6.926610 9.776806e-07 0.0012789148 5.703800
## ECM1    -1.2508931 7.376249 -6.820097 1.221280e-06 0.0013071022 5.596559
## EPN3    -1.1193144 4.644277 -6.772787 1.348775e-06 0.0013232609 5.514740

S16 A549.ACE2.sars.series (MOI 2)

new.data <- data[,71:76]
new.meta <- metadata[102:107,]
new.meta$source_name <- as.factor(new.meta$source_name)

##### design matrix
new.design <- model.matrix(~ source_name, data = new.meta)
new.design
##     (Intercept)
## 102           1
## 103           1
## 104           1
## 105           1
## 106           1
## 107           1
##     source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2
## 102                                                                                        0
## 103                                                                                        0
## 104                                                                                        0
## 105                                                                                        1
## 106                                                                                        1
## 107                                                                                        1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$source_name
## [1] "contr.treatment"
##### filtering
# non-specific filtering
dge <- DGEList(counts = new.data)
counts.filt <- rowSums(cpm(dge) > 1) >=3
# Check what fraction of genes remain
sum(counts.filt)/nrow(dge)
## [1] 0.5828784
# Remove low expressed genes.
dge <- dge[counts.filt,,keep.lib.sizes=T] 

##### TMM normalization
dge <- calcNormFactors(dge, method="TMM")


##### VOOM
v.16 <- voom(dge,new.design,plot=TRUE) 

##### MDS plot
plotMDS(v.16, main="plotMDS(v)",cex=0.5)

##### fit linear model
# finding differential expression by fitting linear models
fit <- lmFit(v.16,new.design)
# calculating the statistics
fit2  <- eBayes(fit)
colnames(fit2)
## [1] "(Intercept)"                                                                             
## [2] "source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2"
# MA plot
#plotMD(fit2, main="plotMD", cex=0.5, coef="genotypePbxMut")


##### list top differentially expressed genes
A549.ACE2.sars.name = topTable(fit2, coef="source_nameSARS-CoV-2 infected A549 cells trasnduced with a vector expressing human ACE2", number=nrow(dge$counts)) 
R_gene.names <- A549.ACE2.sars.name[A549.ACE2.sars.name$adj.P.Val <= 0.05,]
head(R_gene.names,10)
## [1] logFC     AveExpr   t         P.Value   adj.P.Val B        
## <0 rows> (or 0-length row.names)
##### Heatmap df id
df.1 <- A549.ACE2.sars.name %>% mutate(gene_id = rownames(A549.ACE2.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.2 <- A549.RSV.name %>% mutate(gene_id = rownames(A549.RSV.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.3 <- A549.IAV.name %>% mutate(gene_id = rownames(A549.IAV.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.4 <- A549.ACE2.0.2.sars.name %>% mutate(gene_id = rownames(A549.ACE2.0.2.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.5 <- A549.ACE2.sars.name %>% mutate(gene_id = rownames(A549.ACE2.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.6 <- calu.sars.name %>% mutate(gene_id = rownames(calu.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.7 <- A549.RSV.2.name %>% mutate(gene_id = rownames(A549.RSV.2.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )
df.8 <- A549.HPIV3.3.name %>% mutate(gene_id = rownames(A549.HPIV3.3.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )

## 05.17
df.9 <- NHBE.sars.name %>% mutate(gene_id = rownames(NHBE.sars.name)) %>% filter(adj.P.Val <= 0.05 & (logFC >= log2(4.0) | logFC <= log2(1/4.0)) )


df.id <- unique(c(df.1$gene_id,df.2$gene_id,df.3$gene_id,df.4$gene_id,df.5$gene_id,df.6$gene_id,df.7$gene_id,df.8$gene_id,df.9$gene_id))
A549.ACE2.sars.expr <- rowSums(v.16$E[,4:6])/3 
A549.RSV.expr <- rowSums(v.3$E[,3:4])/2
A549.IAV.expr <- rowSums(v.4$E[,3:4])/2
A549.sars.2.expr <- rowSums(v.5$E[,4:6])/3
A549.ACE2.0.2.sars.expr <- rowSums(v.6$E[,4:6])/3
calu.sars.expr <- rowSums(v.7$E[,4:6])/3
A549.RSV.2.expr <- rowSums(v.8$E[,4:6])/3
A549.HPIV3.3.expr <- rowSums(v.8$E[,7:9])/3
A549.ACE2.sars.expr <- A549.ACE2.sars.name$logFC
names(A549.ACE2.sars.expr) <- rownames(A549.ACE2.sars.name)

A549.RSV.expr <- A549.RSV.name$logFC
names(A549.RSV.expr) <- rownames(A549.RSV.name)

A549.IAV.expr <- A549.IAV.name$logFC
names(A549.IAV.expr) <- rownames(A549.IAV.name)

A549.sars.2.expr <- A549.sars.2.name$logFC
names(A549.sars.2.expr) <- rownames(A549.sars.2.name)

A549.ACE2.0.2.sars.expr <- A549.ACE2.0.2.sars.name$logFC
names(A549.ACE2.0.2.sars.expr) <- rownames(A549.ACE2.0.2.sars.name)

calu.sars.expr <- calu.sars.name$logFC
names(calu.sars.expr) <- rownames(calu.sars.name)

A549.RSV.2.expr <- A549.RSV.2.name$logFC
names(A549.RSV.2.expr) <- rownames(A549.RSV.2.name)

A549.HPIV3.3.expr <- A549.HPIV3.3.name$logFC
names(A549.HPIV3.3.expr) <- rownames(A549.HPIV3.3.name)

## 05.16
NHBE.sars.expr <- NHBE.sars.name$logFC
names(NHBE.sars.expr) <- rownames(NHBE.sars.name)

expr.df.ind <- unique(c(rownames(A549.ACE2.sars.name),rownames(A549.RSV.name),rownames(A549.IAV.name),rownames(A549.sars.2.name),rownames(A549.ACE2.0.2.sars.name),rownames(calu.sars.name),rownames(A549.RSV.2.name),rownames(A549.HPIV3.3.name)))
expr <- as.data.frame(matrix(0,nrow=length(expr.df.ind),ncol = 1))
rownames(expr) <- expr.df.ind
expr$A549.ACE2_sars <- A549.ACE2.sars.expr[row.names(expr)]
expr$A549_RSV_15 <- A549.RSV.expr[row.names(expr)]
expr$A549_IAV <- A549.IAV.expr[row.names(expr)]
expr$A549_sars_2 <- A549.sars.2.expr[row.names(expr)]
expr$A549.ACE2_sars_0.2 <- A549.ACE2.0.2.sars.expr[row.names(expr)]
expr$calu.3_sars <- calu.sars.expr[row.names(expr)]
expr$A549_RSV_2 <- A549.RSV.2.expr[row.names(expr)]
expr$A549_HPIV3 <- A549.HPIV3.3.expr[row.names(expr)]
## 05.16
expr$NHBE.sars.expr <- NHBE.sars.expr[rownames(expr)]
expr <- expr[,-1]


expr[is.na(expr)] <- 0
head(expr)
##           A549.ACE2_sars A549_RSV_15    A549_IAV A549_sars_2 A549.ACE2_sars_0.2
## PYROXD1        1.4445284 -0.19520144  0.54641768  1.32184913          0.5000972
## LOC646903      2.0447325 -0.35385533  0.93490427 -0.55711867          1.7906765
## COL12A1       -0.9698611  0.17288559  0.01182408  1.81695058          0.2521718
## MIR210HG       0.9604034 -0.07048356 -3.03185845 -0.08735310         -1.6524088
## ELOVL2        -1.1202871 -0.89029270  0.72595020  0.01463756         -0.3520592
## ST8SIA4       -1.1714517 -0.49092927 -0.19257813 -1.30514808          0.2583169
##           calu.3_sars A549_RSV_2 A549_HPIV3 NHBE.sars.expr
## PYROXD1     1.5127424  0.5006636  0.3833402    -0.04122459
## LOC646903  -0.3175119  0.0000000  0.0000000     0.00000000
## COL12A1    -0.4787169  1.2087476  1.0735949     0.36340299
## MIR210HG   -1.2358923  0.8117790 -1.8625108     0.19876360
## ELOVL2      0.0000000  0.6855460  0.7769844     0.00000000
## ST8SIA4     0.0000000 -0.1548775  1.0237359     0.00000000
#kk <- rowSums(expr)
#kk.ind <- expr[kk == "0"]
#expr[kk == "0"]

#dexpr <- expr[-kk.ind,]
#dexpr

GO term

biomart download GO term ruls: https://support.bioconductor.org/p/122315/

#library(biomaRt)
#uses human ensembl annotations
ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")

#gets gene symbol, transcript_id and go_id for all genes annotated with GO:0007507
cytokine <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
                   filters = 'go', values = c('GO:0034097'), mart = ensembl)

innate.immune <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
                   filters = 'go', values = 'GO:0045087', mart = ensembl)

virus <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
                   filters = 'go', values = 'GO:0009615', mart = ensembl)

inflammatory <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
                   filters = 'go', values = 'GO:0006954', mart = ensembl)

type.I <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
                   filters = 'go',values = c('GO:0034340'),mart = ensembl)

type.III <- getBM(attributes=c('hgnc_symbol', 'external_gene_name', 'go_id', 'name_1006'),
                   filters = 'go',values = c('GO:0034342'),mart = ensembl)


#b <- listFilters(mart = ensembl)
#a <- listAttributes(mart = ensembl)
#a[a$name == "go",]
#b[b$name == "name",]
cytokine <- cytokine[cytokine$go_id == 'GO:0034097',"external_gene_name"]
innate.immune <- innate.immune[innate.immune$go_id == 'GO:0045087',"external_gene_name"]
virus <- virus[virus$go_id == 'GO:0009615',"external_gene_name"]
inflammatory <- inflammatory[inflammatory$go_id == 'GO:0006954',"external_gene_name"]
type.I.III <- c(type.I[type.I$go_id == 'GO:0034340',"external_gene_name"], type.III[type.III$go_id == 'GO:0034342',"external_gene_name"])

anno.id <- unique(c(cytokine,innate.immune,virus,inflammatory,type.I.III))
#length(df.id)
#anna <- as.data.frame(matrix(0,nrow=length(df.id),ncol=5))
#rownames(anna) <- df.id
#anna$V6 <- seq(length(df.id))
dexpr = expr 
ind.anno.df = rownames(dexpr[rownames(dexpr) %in% anno.id, ])

anna <- as.data.frame(matrix(0,nrow=length(ind.anno.df),ncol=5))
rownames(anna) <- ind.anno.df
anna$V6 <- seq(length(ind.anno.df))
cytokine.ind <- anna$V6[rownames(anna) %in% cytokine]
innate.immune.ind <- anna$V6[rownames(anna) %in% innate.immune]
virus.ind <- anna$V6[rownames(anna) %in% virus]
inflammatory.ind <- anna$V6[rownames(anna) %in% inflammatory]
type.I.III.ind <- anna$V6[rownames(anna) %in% type.I.III]
anna$V1[type.I.III.ind] <- 1
anna$V2[cytokine.ind] <- 1
anna$V3[innate.immune.ind] <- 1
anna$V4[virus.ind] <- 1
anna$V5[inflammatory.ind] <- 1

anna <- anna[,1:5]

anna.ind <- anna[rowSums(anna) != 0,]
anna <- anna.ind
anna.ind <- rownames(anna)


colnames(anna) <- c("IFN I & III","cytokine","innate.immune","response to virus","inflammatory")
anna$cytokine <- as.factor(anna$cytokine)
anna$innate.immune <- as.factor(anna$innate.immune)
anna$`response to virus` <- as.factor(anna$`response to virus`)
anna$inflammatory <- as.factor(anna$inflammatory)
anna$`IFN I & III` <- as.factor(anna$`IFN I & III`)
#anna <- t(anna)
#anna <- as.data.frame(anna)

#anna
#length(anna)
#anna = HeatmapAnnotation(df = anna)
anna_hh = HeatmapAnnotation(df = anna, col = list(`IFN I & III` = c("0" = "white", "1" = "pink"),
                                               cytokine = c("0" =  "white", "1" = "blue"), 
                                               innate.immune = c("0" =  "white", "1" = "red"),
                                               `response to virus` = c("0" =  "white", "1" = "green"),
                                               inflammatory = c("0" =  "white", "1" = "yellow")),
                            which = "column",
                            show_legend = F)

ComplexHeatmap


col.concent = brewer.pal(10, "RdBu")
dexpr = expr 
dexpr = dexpr[rownames(dexpr) %in% df.id, ]
dexpr = as.matrix(dexpr)
dexpr = t(dexpr)
#png("heatmap_GO.png", width=4000,height=800, res=315, pointsize=12)

png("heatmap_0507.png", width=4000,height=800, res=315, pointsize=12)
pheatmap(dexpr, cluster_rows=T, cluster_cols=T, main="FDR < 0.5% & >4-fold", 
            scale = "column",
            gaps_col = gaps_col, 
            clustering_distance_cols = "correlation", 
            cellwidth = 0.5,
      cellheight = 10,
#           cutree_cols = 2, 
            treeheight_col = 5,
      treeheight_row = 5,
      width = 100,
            border_color = FALSE, legend = TRUE, legend_labels = "up-/down- regulated",height=20, 
      show_colnames = F,
color=colorRampPalette(col.concent)(500)) 
dev.off()
dexpr = expr 
#dexpr = dexpr[rownames(dexpr) %in% anna.ind, ]
dexpr = dexpr[rownames(dexpr) %in% ind.anno.df, ]
dexpr = as.matrix(dexpr)
dexpr = t(dexpr)
png("heatmap_0516_2_author_complex.png", width=4000,height=1000, res=315, pointsize=12)
Heatmap(dexpr,bottom_annotation = anna_hh, cluster_columns = T, cluster_rows = T, show_row_names = T,show_column_names = F,
      clustering_distance_columns = "pearson",
      clustering_distance_row = "pearson",
      column_dend_height = unit(0.5,"cm"),row_dend_width = unit(0.5,"cm"))
dev.off()
## quartz_off_screen 
##                 2
#Heatmap(dexpr,bottom_annotation = anna_hh, cluster_columns = T, cluster_rows = T, show_row_names = T,show_column_names = F,clustering_distance_columns = "pearson",column_dend_height = unit(0.5,"cm"),row_dend_width = unit(0.5,"cm"))
#dev.off()